saving = FALSE
#' Plot the ggplot for the percentage composition for different class in different
#' datasets.
#'
#' @param seuratObject the seurat object containing the data
#' @param classLabel string indicating which metadata column in seurat object is for
#' the class label (y-axis) for the tile plot
#' @param dataLabel string indicating the metadata column for dataset label (x-axis)
#' @param colorOption color option fot viridis. default is "G" but can change to
#' other letter
#'
#' @author Jieran Sun
plotDataComposition <- function(seruatObject, classLabel, dataLabel, colorOption = "G") {
ca <- data.table(table(cluster=unlist(seruatObject[[classLabel]]),
sample=unlist(seruatObject[[dataLabel]])))
ca[, percent := 100*(N/sum(N)), by = sample]
plot_cluster_comp <- ggplot(as.data.frame(ca), aes(sample, cluster, fill=percent)) +
geom_tile() +
scale_fill_gradientn(colors = viridis_pal(option = colorOption, end = 0.95, direction = -1)(10)) +
geom_text(aes(label= round(percent, digits = 2)) , color = "white") +
coord_fixed(ratio = 0.25) +
guides(fill = guide_colourbar(barwidth = 0.5,barheight = 18))
}
#' Plot a heatmap object using sechm package to indicate the expression level for
#' different marker genes with their corresponding cell type annotation
#'
#' @param sceObject the singleCellExperiment object to input
#' @param markerList the list of marker genes for plotting. The marker gene list
#' should have corresponding cell type as their name. A typical list should
#' contain several array objects for marker genes with each of them named by the
#' corresponding cell types.
#' @param cellType string or list of string indicating which cell type(s) are used for
#' plotting
#' @param heatmapColor color option for heatmap in viridis
#' @param gaps_col if adding column gaps in the heatmap (in the case of plotting
#' merged sceObject with different column indication)
#'
#' @author Jieran Sun
plotSpecificHeatmap <- function(sceObject, markerList, cellType = NULL, heatmapColor = "C", gaps_col = NULL){
if (is.null(cellType) == FALSE) {
markerList <- markerList[cellType]
}
# check markerList
if (is.null(names(markerList))) {
names(markerList) <- paste("list", seq_along(markerList), sep = "-")
}
# Assign a new metadata called cell type to assign the markers on them
assayNames(sceObject) <- "logcounts"
rowData(sceObject)$cellType <- NA
rowData(sceObject)[unlist(markerList),"cellType"] <- rep(names(markerList),lengths(markerList))
# Generate the heatmap based on the selection
geneToPlot <- unlist(markerList)
sechm::sechm(sceObject, geneToPlot,
assayName = "logcounts", gaps_row = "cellType", gaps_at = gaps_col,
show_colnames = TRUE, do.scale = TRUE, na_col = "black",
breaks=1, row_title_rot=0, show_rownames = TRUE,
hmcols = viridis_pal(option = heatmapColor)(100))
}
#' Adjusting the marker list assignment based on the heatmap expression. The heatmap
#' expression is used as the heatmap counts are all normalized on a cluster level.
#' By defining a threshold for the assignment, the function helps rearranging the
#' marker gene from the one cell type (oldCellType) to another (adjustedCellType)
#'
#' @param heatmapObject the heatmap to use for adjusting the markergene assignment
#' @param cluster which cluster expression is used to adjust the marker gene
#' assignment level. it can be a string or a vector of string indicating the
#' name of the cluster. If it is a vector of string, please make sure it have
#' the same length as threshold, oldCellType and adjustedCellType
#' @param threshold double or vector of double indicating the threshold of expression
#' for cluster reassignment
#' @param oldCellType/adjustedCellType string or vector of string for the change of
#' marker gene assignment to cell type lists. should have the same length. names
#' in adjusted cell type should exist in the markerList
#' @param visualization plot the histogram for the expression level in the cluster
#' @param saving save the new marker list
#' @param filePath if save the new marker list, what is the file path
#'
#' @author Jieran Sun
AdjustMarkerAssignment <- function(heatmapObject, markerList, cluster,
threshold, oldCellType, adjustedCellType,
visualization = FALSE, saving = FALSE, filePath = NULL){
newMarkerList <- markerList
# TODO: Check if the dimension agrees
if (length(cluster) > 1 & length(threshold) == 1) {
threshold <- rep(threshold, length(cluster))
}
geneOfInterest <- lapply(seq_along(cluster), function(ind) {
heatmapObject@row_names_param$labels[which(heatmapObject@matrix[,cluster[ind]] > threshold[ind])]
})
for (geneList in geneOfInterest) {
for (gene in geneList) {
if (gene %in% markerList[oldCellType]) {
newMarkerList[adjustedCellType] <- append(newMarkerList[adjustedCellType], gene)
newMarkerList[oldCellType] <- newMarkerList[oldCellType][-which(gene == newMarkerList[oldCellType])]
}}}
if (saving) {
cellType <- names(newMarkerList)
MarkerDF <- data.frame(cellType = cellType,
markers = I(unlist(lapply(newMarkerList, paste, collapse=","))))
write.csv(MarkerDF, file=filePath, quote=FALSE, row.names=FALSE)
}
return(newMarkerList)
}
#' Plot the overall heatmap for the singleCellExperiment Object with different
#' colors indicating different cell type and a legend for information (optional)
#'
#' @param sceObject the singleCellExperiment object to plot
#' @param markerList the list of marker genes for plotting. The marker gene list
#' should have corresponding cell type as their name. A typical list should
#' contain several array objects for marker genes with each of them named by the
#' corresponding cell types.
#' @param metaData an optional list corresponding the cell type acronym to the
#' full name. metaData file is essentially a vector of character (full names)
#' with the acronym as the name for the characters. the full name will be used
#' for the legend. If the metaData is NULL the system will just use the acronym
#' from the markerlist as the legend name
#' @param showAll do we plot all the cell types in the markerList?
#' @param cellType only if showAll is FALSE, we specify marker genes from which
#' cell type to plot
#' @param heatmapColor color options from viridis
#' @param clusterRow do we cluster the row? if so the makerGene-cellType correspondence
#' cannot be maintained
#' @param clusterCOl do we cluster the column such that similar cell type/cluster are
#' plot next to each other
#' @param showLegend if we want to show legend
#'
#' @author Jieran Sun
#'
#' Notice: no. of cell type/legend <= 16! (for color plotting purpose)
showOverallHeatmap <- function(sceObject, markerList, metaData, showAll = TRUE, cellType = NULL,
heatmapColor = "C", clusterRow = FALSE, clusterCol = TRUE,
showLegend = TRUE) {
if (showAll != TRUE) {
if (is.null(cellType)) {
stop("Please specify cell types to show.")
}
markerList <- markerList[cellType]
}
if (showLegend == TRUE) {
# Generate heatmap block and anntoation color
if(is.null(metaData)) {
cellname <- names(markerList)
} else {
cellname <- c(metaData[names(markerList)])
}
# Set up the color for the heatmap
legendColors <- pal_simpsons()(length(cellname))
names(legendColors) <- cellname
legendColors <- list(type = legendColors)
# Set up the annotation matrix
annoDF <- data.frame(row.names=unlist(markerList),
type=rep(cellname, lengths(markerList)))
pheatmap(assay(sceObject)[unlist(markerList),],
color = viridis_pal(option = heatmapColor)(100),
annotation_row = annoDF,
annotation_colors = legendColors,
gaps_row = cumsum(lengths(markerList)),
split=rep(cellname, lengths(markerList)),
cluster_rows=clusterRow, cluster_cols = clusterCol,
scale="row")
} else {
pheatmap(assay(sceObject)[unlist(markerList),],
color = viridis_pal(option = heatmapColor)(100),
gaps_row = cumsum(lengths(markerList)),
split=rep(cellname, lengths(markerList)),
cluster_rows=clusterRow, cluster_cols = clusterCol,
scale="row")
}
}
#' Find the top expression genes in each cluster/cell type in the sceObject
#'
#' @param sceObject singleCellExperiment object. The object should be aggregated
#' already with column indicating cell types or clusters
#' @param markerlist the list of marker genes for plotting. The marker gene list
#' should have corresponding cell type as their name. A typical list should
#' contain several array objects for marker genes with each of them named by the
#' corresponding cell types.
#' @param n number of top expression gene to return
#' @param allMarker do we return all the marker gene on the list?
#' @param cellType if allMarker is FALSE, which cell type of genes should we return
#'
#' @return a data table with column as the cluster/cell type depending on the column of
#' the sceObject. And n number of rows indicating which genes are the most expressing
#'
#' @author Jieran Sun
findTopGenes <- function(sceObject, markerlist, n = 20, allMarker = TRUE, cellType = NULL){
if (allMarker != TRUE) {
if (is.null(cellType)){
stop("Specify the cell type for gene return! /n")
}
markerlist <- unlist(markerlist[cellType])
}
topMat <- data.table(assay(sceObject)[unlist(markerlist),], keep.rownames = TRUE)
topMat <- topMat[, lapply(.SD, function(x){
Best_n <- sort(x, index.return=TRUE, decreasing=TRUE)
rn[Best_n$ix[1:n]]
}), .SDcols = colnames(topMat)[-1]]
return(topMat)
}
#' Plot a list of Violin plot/Ridge plot/Heatmap plot given a list of genes.
#'
#' @param conutObject either the seurat object (for violin and ridge plot) or the
#' sceObject (for the heatmap plot)
#' @param feature_list essentially the markerList in the previous functions
#' @param plot_type type of plot to return, can be Violin plot ("Vln"), ridge
#' plot ("Rdg"), and heatmap ("Heatmap")
#' @param group_var columns on the metadata file in the seurat object indicating
#' if we're using clusters or cell types (for heatmap it can be NULL, but it
#' need to be used for the pdf generated if saving is TRUE, so maybe just
#' keep it as the same way)
#' @param saving if we want to save the final results
#'
#' @return a list of plots based on the parameters
plotMarkerList <- function(countObject, feature_list, plot_type = c("Vln", "Rdg", "Htm"),
group_var = c("orig.ident", "seurat_clusters","label_calibrated","label_auto"),
saving = FALSE){
if (is.list(feature_list[[1]]) != TRUE) {
feature_list <- list(feature_list)
}
Plot_list <- lapply(seq_along(feature_list), function(ind) {
feature <- feature_list[[ind]]
if (plot_type == "Vln") {
Plot <- VlnPlot(countObject, features = feature,
group.by = sprintf("%s", group_var), cols = pal_simpsons()(16),pt.size = 0) +
plot_annotation(title = sprintf("list_%s", ind))
} else if (plot_type == "Rdg") {
Plot <- RidgePlot(countObject, features = feature,
group.by = sprintf("%s", group_var), cols = pal_simpsons()(16)) +
plot_annotation(title = sprintf("list_%s", ind))
} else if (plot_type == "Htm") {
Plot <- sechm::sechm(countObject, feature,
assayName = "logcounts", cluster_cols = FALSE,
show_colnames = TRUE, do.scale = TRUE, na_col = "black",
breaks=1, row_title_rot=0, show_rownames = TRUE,
hmcols = viridis_pal(option = "C")(50))
}
return(Plot)
})
if (saving == TRUE) {
label <- strsplit(group_var, "[.]|_")[[1]][2]
pdf(file.path(working_path, sprintf("Visualization/Markers_%s_%s.pdf", plot_type, label)),
width = 20, height = 20)
print(Plot_list)
dev.off()
}
return(Plot_list)
}
# three different samples: 100-WT, 101 - GpnmbKO, 102-GpnmbKO)
seurat_list <- lapply(list.files( data_path, pattern = "*.h5", full.name =TRUE),
function(file){
file_ind <- which(list.files( data_path, pattern = "*.h5", full.name =TRUE) == file)
seurat_object <- Seurat::Read10X_h5(file , use.names = TRUE, unique.features = TRUE) %>%
CreateSeuratObject(project = c("WT", "GpnmbKO_1", "GpnmbKO_2")[file_ind], min.cells = 3, min.features = 200)
seurat_object$stim <- sprintf("cond%s", file_ind-1)
seurat_object[["percent.mt"]] <- PercentageFeatureSet(seurat_object, pattern = "^mt-")
seurat_object[["percent.rb"]] <- PercentageFeatureSet(seurat_object, pattern = "^Rp[sl]")
seurat_object
})
# Leaving only the intersected genes
seurat_list <- lapply(seurat_list, function(x) rownames(x)) %>%
{Reduce(intersect, .)} %>%
{lapply(seurat_list, function(x) x[.,])}
# Roughly merged the genes together
seurat_combi <- merge(seurat_list[[1]], seurat_list[-1], project = "CombinedSeurat")
## Warning in CheckDuplicateCellNames(object.list = objects): Some cell names are
## duplicated across objects provided. Renaming to enforce unique cell names.
rm(seurat_list)
Plot Vplot for QC visualization
(VP_Overall <- VlnPlot(seurat_combi, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
group.by = "orig.ident", cols = pal_npg("nrc")(9),
ncol = 4, pt.size = 0) + xlab("conditions") )
# QC processing (remove ribosome and mitochrondial RNA)
# try 7 next time? rb was 10 now 15
mt <- 7
rb <- 15
seurat_combi <- subset(seurat_combi, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < mt & percent.rb < rb )
# Recheck the violin plot after QC
(VP_afterQC <- VlnPlot(seurat_combi, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
group.by = "orig.ident", cols = pal_npg("nrc")(9),
ncol = 4, pt.size = 0) + xlab("conditions"))
ver = FALSE
set.seed(2023)
# Normalize, scale and run PCA on the seurat object
seurat_combi <- seurat_combi %>%
NormalizeData(verbose = ver) %>%
FindVariableFeatures(nfeatures = 3000, verbose = ver) %>%
ScaleData(verbose = ver) %>%
RunPCA(npcs = 50, verbose = ver)
#' Select essential PCs for clustering
#' PC is selected based on 2 criteria, either the no. of pcs can already
#' expalined 80% of the variables and the current one falls under 5%, or
#' the acculumated change in the variance explained in PC fall under 0.1%
pct <- seurat_combi[["pca"]]@stdev / sum(seurat_combi[["pca"]]@stdev) * 100
choice1 <- which(cumsum(pct) > 80 & pct < 5)[1]
choice2 <- sort(which((pct[1:length(pct) - 1] - pct[2:length(pct)]) > 0.1 & cumsum(pct) > 60 ), decreasing = T)[1] + 1
## Warning in (pct[1:length(pct) - 1] - pct[2:length(pct)]) > 0.1 & cumsum(pct) >
## : longer object length is not a multiple of shorter object length
pcs <- min(choice1, choice2, 40)
# Plot an elbow plot to visualize the selection (Sanity check)
Seurat::ElbowPlot(seurat_combi, ndims= 50) + ylab("variance explained (%)") +
geom_point(x = pcs, y = seurat_combi[["pca"]]@stdev[pcs] , colour = "red") +
geom_label(
label=sprintf("PC selected: %1.0f",pcs),
x=pcs,
y=seurat_combi[["pca"]]@stdev[pcs] + 1,
label.padding = unit(0.55, "lines"), # Rectangle size around label
label.size = 0.35,
color = "black",
fill="#69b3a2")
set.seed(2023)
# Run Harmony integration on the dataset, and find the clusters
seurat_combi <- RunHarmony(seurat_combi, group.by.vars = "orig.ident",
dims.use = 1:pcs, max.iter.harmony = 50, verbose = ver)
## Warning: Invalid name supplied, making object name syntactically valid. New
## object name is Seurat..ProjectDim.RNA.harmony; see ?make.names for more details
## on syntax validity
seurat_combi <- FindNeighbors(seurat_combi, reduction = "harmony", dims = 1:pcs, verbose = ver)
seurat_combi <- FindClusters(seurat_combi, resolution = 0.6, verbose = ver) # used to be 0.5
# Run UMAP based on the clsuter information
seurat_combi <- RunUMAP(seurat_combi, reduction = "harmony", dims = 1:pcs, verbose = ver)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
# UMAP Plot colored based on clusters
Plot1 <- DimPlot(seurat_combi, group.by = "seurat_clusters", label = TRUE, label.color = "black", pt.size = 0.3, cols = pal_simpsons()(16)) + plot_annotation(title="Clusters") + NoAxes()
# UMAP plot colored based on data sources
Plot2 <- DimPlot(seurat_combi, group.by = "orig.ident",pt.size = 0.3, cols = pal_simpsons()(3)) +
plot_annotation(title="Conditions") + NoAxes()
# Visualize the two plots
(plot_UMAP <- Plot1 + Plot2)
(plot_clusterComp <- plotDataComposition(seruatObject = seurat_combi, classLabel = "seurat_clusters", dataLabel = "orig.ident"))
Here we plot the heatmap for all the marker genes found in the dataset, regardless of their expression level.
old_way_loading = FALSE
if (old_way_loading){
# Import list of marker genes
marker_list <- readxl::read_excel(file.path(working_path,"Markers/Marker_list_BU.xlsx"))[-10]
Meta_Data <- unlist(readxl::read_excel(file.path(working_path,"Markers/Marker_list_BU.xlsx"))[10])
meta_data <- unlist(data.table::tstrsplit(Meta_Data, ":", fixed = TRUE, keep = 2))
names(meta_data) <- unlist(tstrsplit(Meta_Data, ":", fixed = TRUE, keep = 1))
meta_data <- meta_data[!is.na(meta_data)]
# Filter and leave only those marker genes existed in the cluster
exist_list <- lapply(marker_list, function(x){
x <- x[!is.na(x)]
x <- x[x %in% row.names(seurat_combi)]})
} else{
# Extract the data directly from the system
exist_list <- readRDS("Markers/marker_list.rds")
meta_data <- exist_list[[length(exist_list)]]
exist_list <- lapply(exist_list[-length(exist_list)], function(x){
x <- x[!is.na(x)]
x <- x[x %in% row.names(seurat_combi)]})
}
#' If we plot the overall heatmap (The process is time-consuming and require computing power), notice
#' the large heatmap only provide limited information and is very time-consuming. By default we don't
#' run this heatmap but if you want a detailed overview of the dataset/dataset is relatively small then
#' you can use it for an overview.
OverallHeatmap = FALSE
if (OverallHeatmap){
# Find differential marker genes in across different clusters
seurat_markers <- Seurat::FindAllMarkers(seurat_combi, min.pct = 0.2, test.use = "bimod", verbose = FALSE) %>%
group_by(cluster) %>%
subset(gene %in% unlist(exist_list))
# Plot the heatmap
(heatmap_feature <- DoHeatmap(seurat_combi, features = unique(seurat_markers$gene), group.colors = pal_simpsons()(16)))
}
# Create SingleCellExperiment object for easier manipulation
logcounts <- GetAssayData(seurat_combi)
seurat_sce <- SingleCellExperiment(assays = logcounts,
colData = seurat_combi@meta.data,
rowData = rownames(seurat_combi))
assayNames(seurat_sce) <- "logcounts"
# Find the average expression of each gene in each cluster
sce_cluster <- muscat::aggregateData(seurat_sce, "logcounts", by=c("seurat_clusters"), fun="mean")
SummarizedExperiment::assayNames(sce_cluster) <- "logcounts"
# Assign a new metadata called cell type to assign the markers on them
rowData(sce_cluster)$cellType <- NA
rowData(sce_cluster)[unlist(exist_list),"cellType"] <- rep(names(exist_list),lengths(exist_list))
# Visualizing the heatmap data of the expression value
(heatmap_ClusterOverview <- plotSpecificHeatmap(sce_cluster, exist_list))
# Return average expression of genes in each cell type in each cluster
sce_cellType <- assay(sce_cluster)[unlist(exist_list),]
sce_cellType <- aggregate(t(scale(t(sce_cellType))),
by=list(type=rep(names(exist_list), lengths(exist_list))),
FUN=mean)
sce_cellType <- as.data.table(sce_cellType)
# for each column (cluster), we select the row (cell type) which has the maximum aggregated value
top_number <- 4
sce_cellType <- sce_cellType[, lapply(.SD, function(x){
Best_n <- sort(x, index.return=TRUE, decreasing=TRUE)
type[Best_n$ix[1:top_number]]
}), .SDcols = colnames(sce_cellType)[-1]]
sce_cellType
## 0 1 2 3 4 5 6 7 8 9 10
## 1: HSM HSM DAM WAM HSM HSM CAM IFM IFM Cell_Cycle SAM
## 2: SAM WAM CAM DAM PGM PGM IRM HSM PGM SAM IRM
## 3: Cell_Cycle IRM PGM ATM IRM WAM PGM SAM IRM HSM ATM
## 4: CAM DAM ATM PGM ATM IRM WAM Cell_Cycle WAM ATM Cell_Cycle
## 11
## 1: CAM
## 2: IFM
## 3: Cell_Cycle
## 4: HSM
acronym <- TRUE
# Assign automatic cell type based on the highest expressed gene
cellType_auto <- if (acronym == TRUE) unlist(sce_cellType[1,]) else meta_data[unlist(sce_cellType[1,])]
label_auto <- paste0(cellType_auto, " (", colnames(sce_cellType), ")")
# After tuning the results, need to change every time you change some parameters
cellType_calibrated <- cellType_auto
if (rb == 15 & mt == 7) {
# clustering resolution 0.6
cellType_calibrated[c(8,9)] <- if (acronym==TRUE) c("IFM-I", "IFM-II") else
c("Inflammatory microglia-I", "Inflammatory microglia-II")
cellType_calibrated[c(3)] <- if (acronym==TRUE) c("PAM") else c("Pre-active microglia")
} else if (rb == 10 & mt == 7 | rb == 10 & mt == 5) {
cellType_calibrated[c(9,10)] <- if (acronym==TRUE) c("IFM-I", "IFM-II") else
c("Inflammatory microglia-I", "Inflammatory microglia-II")
cellType_calibrated[c(4)] <- if (acronym==TRUE) c("PAM") else c("Pre-active microglia")
cellType_calibrated[c(5)] <- if (acronym==TRUE) c("DAM") else c("Disease-Associated Microglia")
cellType_calibrated[c(12)] <- if (acronym==TRUE) c("SAM") else c("Senescence-associated microglia")
}
label_calibrated <- paste0(cellType_calibrated, " (", colnames(sce_cellType), ")")
# we convert the cells' cluster labels to cell type labels:
seurat_sce$label_auto <- cellType_auto[seurat_sce$seurat_clusters]
seurat_sce$label_cali <- cellType_calibrated[seurat_sce$seurat_clusters]
# we aggregate again to pseudo-bulk using the new clusters
sce_labelAuto <- aggregateData(seurat_sce, "logcounts", by=c("label_auto"), fun="mean")
sce_labelCali <- aggregateData(seurat_sce, "logcounts", by=c("label_cali"), fun="mean")
# we plot again the expression of the markers as a sanity check
assayNames(sce_labelAuto) <- "logcounts"
assayNames(sce_labelCali) <- "logcounts"
seurat_combi$label_auto <- cellType_auto[seurat_sce$seurat_clusters]
seurat_combi$label_calibrated <- cellType_calibrated[seurat_sce$seurat_clusters]
if (saving){
saveRDS(seurat_combi, "Annotated_seurat_harmony.rds")
}
plot_labelAuto <- DimPlot(seurat_combi, group.by = "label_auto",pt.size = 0.3, cols = pal_npg("nrc")(10), label = TRUE) +
plot_annotation(title="Automatic cluster labels") + NoAxes()
plot_labelCali <- DimPlot(seurat_combi, group.by = "label_calibrated",pt.size = 0.3, cols = pal_futurama("planetexpress")(12), label = TRUE) +
plot_annotation(title="Calibrated cluster labels") + NoAxes()
(plot_UMAPlabel <- plot_labelAuto + plot_labelCali)
(hmCluster <- showOverallHeatmap(sce_cluster, markerList = exist_list, metaData = meta_data))
(hmClusterGene <- showOverallHeatmap(sce_cluster, markerList = exist_list, metaData = meta_data, clusterRow = TRUE))
(hmLabelAuto <- showOverallHeatmap(sce_labelAuto, markerList = exist_list, metaData = meta_data))
(hmLabelCali <- showOverallHeatmap(sce_labelCali, markerList = exist_list, metaData = meta_data))
if (saving == TRUE){
pdf(file.path(working_path, "Visualization/Overall_Heatmaps.pdf"), width = 15, height = 15)
print(hmCluster)
plot.new()
print(hmClusterGene)
plot.new()
print(hmLabelAuto)
plot.new()
print(hmLabelCalis)
plot.new()
dev.off()
}
(heatmap_cluIRM <- plotSpecificHeatmap(sce_cluster, exist_list, "IRM"))
(heatmap_cluATMDAM <- plotSpecificHeatmap(sce_cluster, exist_list, c("ATM","DAM")))
(heatmap_labATMDAM <- plotSpecificHeatmap(sce_labelAuto, exist_list, c("ATM","DAM")))
if (saving == TRUE){
pdf(file.path(working_path, "Visualization/DAM_IRM_ITM_name.pdf"), width = 15, height = 15)
print(heatmap_showrowname_1)
print(heatmap_showrowname_2)
dev.off()
}
links <- data.table(table(source=unlist(paste0("Cluster_", seurat_combi$seurat_clusters)),
target=unlist(seurat_combi$label_calibrated))) %>%
.[N != 0,] %>% .[order(-N),]
nodes <- data.frame(
name=c(as.character(links$source), as.character(links$target)) %>%
unique()
)
links$IDsource <- match(links$source, nodes$name)-1
links$IDtarget <- match(links$target, nodes$name)-1
my_color <- "d3.scaleOrdinal() .domain([' Cluster_0 ',' Cluster_1 ',' Cluster_2 ',' Cluster_3 ',' Cluster_4 ',' Cluster_5 ',' Cluster_6 ',' Cluster_7 ',' Cluster_8 ',' Cluster_9 ',' Cluster_10 ',' Cluster_11 ',' CAM ',' Cell_Cycle ',' DAM ',' HSM ',' IFM-I ',' IFM-II ',' PAM ',' SAM ']) .range([' #FED439FF ',' #709AE1FF ',' #8A9197FF ',' #D2AF81FF ',' #FD7446FF ',' #D5E4A2FF ',' #197EC0FF ',' #F05C3BFF ',' #46732EFF ',' #71D0F5FF ',' #370335FF ',' #075149FF ',' #FF6F00FF ',' #C71000FF ',' #008EA0FF ',' #8A4198FF ',' #5A9599FF ',' #FF6348FF ',' #84D7E1FF ',' #FF95A8FF '])"
(sankeyGraph <- sankeyNetwork(Links = links, Nodes = nodes,
Source = "IDsource", Target = "IDtarget",
Value = "N", NodeID = "name",
sinksRight=FALSE, fontSize = 12,
fontFamily = "Helvetica",
colourScale = my_color))
topMatClu <- findTopGenes(sce_cluster, markerlist = exist_list)
topMatLab <- findTopGenes(sce_labelCali, markerlist = exist_list)
if (saving == TRUE) {
write.csv(top_matrix, file.path(working_path,sprintf("TOP_%s_gene_per_matrix.csv", top_number)))
}
seurat_combi$orig.ident[which(seurat_combi$orig.ident == "GpnmbKO_1" | seurat_combi$orig.ident == "GpnmbKO_2")] <- "GpnmbKO"
(plot_cellTypeCompAuto <- plotDataComposition(seruatObject = seurat_combi, classLabel = "label_auto", dataLabel = "orig.ident"))
(plot_cellTypeCompCali <- plotDataComposition(seruatObject = seurat_combi, classLabel = "label_calibrated", dataLabel = "orig.ident"))
if (saving == TRUE){
# for adding the graph
suppressPackageStartupMessages(library(gridExtra))
ClusterTable <- data.table::rbindlist(list(sce_cellType, topMatClu))
CellTypeTable <- topMatLab
pdf(sprintf("Visualization/Visualization_RB%s_MT%s.pdf", rb, mt), width = 16, height = 8)
print(plot_UMAP)
print(plot_labelAuto + plot_labelCali + plot_annotation("Automated vs. Calibrated cell type annotation"))
print(heatmap_ClusterOverview)
plot.new()
text(x = 0, y = 1, adj = c(0,1), labels = "Heatmap clustering the gene")
print(hmClusterGene)
plot.new()
text(x = 0, y = 1, adj = c(0,1), labels = "Heatmap for each cluster")
print(hmCluster)
plot.new()
text(x = 0, y = 1, adj = c(0,1), labels = "Heatmap for each automatically annotated cell type")
print(hmLabelAuto)
plot.new()
text(x = 0, y = 1, adj = c(0,1), labels = "Heatmap for each manually calibrated cell type")
print(hmLabelCali)
print(plot_cellTypeCompAuto + plot_cellTypeCompCali +
plot_annotation("Condition composition in automated vs. calibrated cell type annotation"))
grid::grid.newpage()
grid::grid.text("most likely cell type in each cluster",x = (0.5), y = (0.8))
grid.table(sce_cellType)
grid::grid.newpage()
grid::grid.text("most expressed genes in each cluster",x = (0.5), y = (0.9))
grid.table(topMatClu)
grid::grid.newpage()
grid::grid.text("most expressed genes in each calibrated cell type",x = (0.5), y = (0.9))
grid.table(topMatLab)
dev.off()
}
For differentially expressed genes, find out their expression profile across different clusters
flist_1 <- c("Cd63","Ctsb","Bax","Cd9","Cd68")
flist_2 <- c("P2ry12","P2ry13","Hexb","Selplg","Tmem119")
flist_3 <- c("Nfkbia","Egr1","Zfp36","Nfkbiz","Ier2","Atf3","Ier3","Apoe", "Trem2", "Tyrobp")
flist_4 <- c("Cd74","H2-Eb1","H2-aa","Cxcl2","Pf4","H2-Ab1")
flist_5 <- c("Top2a","Ccl2","Cxcl10","Hmgb2","Mki67")
flist_6 <- c("Cxcl3","Ifitm3","Slfn5","Ccl12","Ifi204","Stat1","Ifit3","Ifit2","Ccl4","Il1b","Rtp4","Ifitmp","Cst7","Ccl3")
flist_7 <- c("H3f3b","Hsp90aa1","Hsp90ab1","Hsp90b1","Hspa5","Hspa8","Jun","Junb","Malat1","Rps16","Zfp36l1","Fos","Egr1")
flist_8 <- c("Cdkn2a","Cdkn1a","Cdkn2d","Casp8","Glb1")
heatmapmarkersFinal <- c("P2ry12","Cx3cr1","P2ry13","Tmem119","Csf1r","Hexb","Mrc1","Ms4a7","Pf4","Cd9","Trem2","Spp1","Itgax","Cd83","Ifit1","Ifit2","Isg15","Ifitm3","Ccl3","Ccl4","Cst7","Il1b","Rtp4","Cd68","Ctsd","Lamp1","Usp18","Cdk1","Mki67","Birc5","Cd36","Cd74","H2-Aa","Cdkn2a","Cdkn1a","Cdkn2d","Casp8","Il1b","Glb1","Serpine1","Il1rl1") #HOXB8 not in the gene list
feature_list <- lapply(list(flist_1, flist_2, flist_3, flist_4, flist_5, flist_6, flist_7, flist_8),
function(x) {x[x %in% rownames(seurat_combi)]})
colLabel <- c(rep("label", ncol(sce_labelCali)), rep("cluster", ncol(sce_cluster)))
sce_combined <- SingleCellExperiment(cbind(assay(sce_labelCali), assay(sce_cluster)),
rowData = rowData(sce_labelCali), colData = colLabel)
hmap_feature <- plotSpecificHeatmap(sce_combined, feature_list, gaps_col = "X")
hmap_marker <- plotSpecificHeatmap(sce_combined, list(Markers=heatmapmarkersFinal), gaps_col = "X")
if (saving == TRUE) {
pdf(file.path(working_path, "Visualization/Heatmap_Markers_Calibrated.pdf"), width = 17, height = 10)
print(hmap_feature)
print(hmap_marker)
dev.off()
}
VP_clu_list <- plotMarkerList(seurat_combi, feature_list, "seurat_clusters", plot_type = "Vln", saving = saving)
VP_ano_list <- plotMarkerList(seurat_combi, feature_list, "label_auto", plot_type = "Vln", saving = saving)
RP_clu_list <- plotMarkerList(seurat_combi, feature_list, "seurat_clusters", plot_type = "Rdg", saving = saving)
RP_ano_list <- plotMarkerList(seurat_combi, feature_list, "label_auto", plot_type = "Rdg", saving = saving)
HM_clu_list <- plotMarkerList(sce_cluster, feature_list, "seurat_clusters",plot_type = "Htm", saving = saving)
HM_ano_list <- plotMarkerList(sce_labelAuto, feature_list, "label_auto", plot_type = "Htm", saving = saving)
# Select a random gene for demonstration
featured_gene <- "P2ry13"
# Vlnplot
VPClu_gene <- VlnPlot(seurat_combi, features = featured_gene,
group.by = "seurat_clusters", cols = pal_simpsons()(16),pt.size = 0) +
xlab("Clusters")
VPLab_gene <- VlnPlot(seurat_combi, features = featured_gene,
group.by = "label_calibrated", cols = pal_futurama("planetexpress")(12) ,pt.size = 0) +
xlab("Cell type")
# ridge plot
RPClu_gene <- RidgePlot(seurat_combi, features = featured_gene, group.by = "seurat_clusters",
cols = pal_simpsons()(16), sort = FALSE, stack = FALSE) +
xlab("Expression value")
RPLab_gene <- RidgePlot(seurat_combi, features = featured_gene, group.by = "label_calibrated",
cols = pal_futurama("planetexpress")(12), sort = FALSE, stack = FALSE) +
xlab("Expression value")
(Plot_gene <- (VPClu_gene | VPLab_gene) / (RPClu_gene | RPLab_gene))
# Dimplot for expression level analysis
color.features <- viridis_pal(option = "C", direction = 1 )(100)
(DP_gene <- FeaturePlot(seurat_combi, features=unlist(exist_list)[1:2], ncol=2, pt.size = 0.5, cols = color.features) & NoAxes())
presentMarker <- c("Trem2","Cd33","Apoe","Sorl1","Mef2c","Spp1","Il1rap","H2-Eb1","Clu","Tyrobp","Ccl3","Ccl4","Ifit1","Myd88","Mtor","Cx3cr1","Sirpa","Mafb","Igf1")
(presentHeatmap <- plotMarkerList(countObject = sce_labelCali, feature_list = presentMarker,
plot_type = "Htm", group_var = "orig.ident"))
## [[1]]
presentVln <- VlnPlot(seurat_combi, features = presentMarker, group.by = "label_calibrated", cols = pal_simpsons()(16),pt.size = 0, ncol = 10)
presentRdg <- RidgePlot(seurat_combi, features = presentMarker, group.by = "label_calibrated", cols = pal_simpsons()(16), ncol = 10)
if (saving == TRUE) {
pdf("Visualization/Marker_Vln_ident.pdf", width = 24, height = 20)
print(presentVln)
print(presentRdg)
dev.off()
pdf("Visualization/Marker_Rdg_ident.pdf", width = 30, height = 8)
print(presentRdg)
dev.off()
}
sessionInfo()
## R version 4.3.0 (2023-04-21)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Monterey 12.2.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Europe/Zurich
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] pheatmap_1.0.12 sechm_1.8.0
## [3] patchwork_1.1.3 viridis_0.6.3
## [5] viridisLite_0.4.2 networkD3_0.4
## [7] ggsignif_0.6.4 ggsci_3.0.0
## [9] ggplot2_3.4.2 data.table_1.14.8
## [11] dplyr_1.1.2 muscat_1.14.0
## [13] SingleCellExperiment_1.22.0 SummarizedExperiment_1.30.1
## [15] Biobase_2.60.0 GenomicRanges_1.52.0
## [17] GenomeInfoDb_1.36.0 IRanges_2.34.0
## [19] S4Vectors_0.38.1 BiocGenerics_0.45.3
## [21] MatrixGenerics_1.12.0 matrixStats_0.63.0
## [23] harmony_0.1.1 Rcpp_1.0.10
## [25] SeuratObject_4.1.4 Seurat_4.3.0.1
## [27] readxl_1.4.3
##
## loaded via a namespace (and not attached):
## [1] spatstat.sparse_3.0-2 bitops_1.0-7
## [3] httr_1.4.6 RColorBrewer_1.1-3
## [5] doParallel_1.0.17 numDeriv_2016.8-1.1
## [7] tools_4.3.0 sctransform_0.4.0
## [9] backports_1.4.1 utf8_1.2.3
## [11] R6_2.5.1 lazyeval_0.2.2
## [13] uwot_0.1.16 GetoptLong_1.0.5
## [15] withr_2.5.0 sp_2.0-0
## [17] prettyunits_1.1.1 gridExtra_2.3
## [19] progressr_0.13.0 cli_3.6.1
## [21] spatstat.explore_3.2-3 TSP_1.2-4
## [23] labeling_0.4.2 sass_0.4.6
## [25] mvtnorm_1.2-3 spatstat.data_3.0-1
## [27] blme_1.0-5 ggridges_0.5.4
## [29] pbapply_1.7-2 scater_1.29.4
## [31] parallelly_1.35.0 limma_3.56.1
## [33] rstudioapi_0.14 generics_0.1.3
## [35] shape_1.4.6 gtools_3.9.4
## [37] ica_1.0-3 spatstat.random_3.1-6
## [39] Matrix_1.6-1.1 ggbeeswarm_0.7.2
## [41] fansi_1.0.4 abind_1.4-5
## [43] lifecycle_1.0.3 yaml_2.3.7
## [45] edgeR_3.42.2 gplots_3.1.3
## [47] Rtsne_0.16 grid_4.3.0
## [49] promises_1.2.0.1 crayon_1.5.2
## [51] miniUI_0.1.1.1 lattice_0.21-8
## [53] beachmat_2.16.0 cowplot_1.1.1
## [55] pillar_1.9.0 knitr_1.42
## [57] ComplexHeatmap_2.16.0 rjson_0.2.21
## [59] boot_1.3-28.1 future.apply_1.10.0
## [61] codetools_0.2-19 leiden_0.4.3
## [63] glue_1.6.2 V8_4.3.3
## [65] vctrs_0.6.2 png_0.1-8
## [67] Rdpack_2.5 cellranger_1.1.0
## [69] gtable_0.3.3 cachem_1.0.8
## [71] xfun_0.39 rbibutils_2.2.15
## [73] S4Arrays_1.0.6 mime_0.12
## [75] survival_3.5-5 seriation_1.4.2
## [77] iterators_1.0.14 ellipsis_0.3.2
## [79] fitdistrplus_1.1-11 ROCR_1.0-11
## [81] nlme_3.1-162 pbkrtest_0.5.2
## [83] bit64_4.0.5 progress_1.2.2
## [85] EnvStats_2.8.1 RcppAnnoy_0.0.21
## [87] bslib_0.4.2 TMB_1.9.6
## [89] irlba_2.3.5.1 vipor_0.4.5
## [91] KernSmooth_2.23-20 colorspace_2.1-0
## [93] ggrastr_1.0.2 DESeq2_1.40.2
## [95] tidyselect_1.2.0 bit_4.0.5
## [97] curl_5.0.0 compiler_4.3.0
## [99] BiocNeighbors_1.18.0 hdf5r_1.3.8
## [101] DelayedArray_0.26.2 plotly_4.10.2
## [103] scales_1.2.1 caTools_1.18.2
## [105] remaCor_0.0.16 lmtest_0.9-40
## [107] stringr_1.5.0 digest_0.6.31
## [109] goftest_1.2-3 spatstat.utils_3.0-3
## [111] minqa_1.2.6 variancePartition_1.30.2
## [113] rmarkdown_2.21 ca_0.71.1
## [115] aod_1.3.2 XVector_0.40.0
## [117] RhpcBLASctl_0.23-42 htmltools_0.5.5
## [119] pkgconfig_2.0.3 lme4_1.1-34
## [121] sparseMatrixStats_1.12.0 highr_0.10
## [123] fastmap_1.1.1 rlang_1.1.1
## [125] GlobalOptions_0.1.2 htmlwidgets_1.6.2
## [127] shiny_1.7.4 DelayedMatrixStats_1.22.0
## [129] farver_2.1.1 jquerylib_0.1.4
## [131] zoo_1.8-12 jsonlite_1.8.4
## [133] BiocParallel_1.34.1 BiocSingular_1.16.0
## [135] RCurl_1.98-1.12 magrittr_2.0.3
## [137] scuttle_1.9.4 GenomeInfoDbData_1.2.10
## [139] munsell_0.5.0 reticulate_1.32.0
## [141] stringi_1.7.12 zlibbioc_1.46.0
## [143] MASS_7.3-59 plyr_1.8.8
## [145] randomcoloR_1.1.0.1 parallel_4.3.0
## [147] listenv_0.9.0 ggrepel_0.9.3
## [149] deldir_1.0-9 splines_4.3.0
## [151] tensor_1.5 hms_1.1.3
## [153] circlize_0.4.15 locfit_1.5-9.7
## [155] igraph_1.4.2 spatstat.geom_3.2-5
## [157] reshape2_1.4.4 ScaledMatrix_1.8.1
## [159] evaluate_0.21 nloptr_2.0.3
## [161] foreach_1.5.2 httpuv_1.6.9
## [163] RANN_2.6.1 tidyr_1.3.0
## [165] purrr_1.0.1 polyclip_1.10-6
## [167] future_1.32.0 clue_0.3-64
## [169] scattermore_1.2 rsvd_1.0.5
## [171] broom_1.0.5 xtable_1.8-4
## [173] later_1.3.1 tibble_3.2.1
## [175] lmerTest_3.1-3 glmmTMB_1.1.7
## [177] registry_0.5-1 beeswarm_0.4.0
## [179] cluster_2.1.4 globals_0.16.2